{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "bb85d27f-e18b-4edf-a6f1-da6695a75046",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "from sklearn.metrics import silhouette_score\n",
    "from sklearn.cluster import KMeans\n",
    "import matplotlib.pyplot as plt\n",
    "from tqdm import tqdm  \n",
    "from sklearn.cluster import MiniBatchKMeans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c5fcbf78-c7a0-427b-923b-21b60a79820a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据准备\n",
    "target = 3 \n",
    "data_name = ['0618', '0854', '1066'][target - 1]\n",
    "#基于领域知识\n",
    "features_bgr1x1 = np.load(f'./Features/{data_name}_BGR1x1_featuers.npy')\n",
    "features_hsv1x1 = np.load(f'./Features/{data_name}_HSV1x1_features.npy')\n",
    "features_bgr3x3 = np.load(f'./Features/{data_name}_BGR3x3_featuers.npy')\n",
    "features_hsv3x3 = np.load(f'./Features/{data_name}_HSV3x3_features.npy')\n",
    "features_hog = np.load(f'./Features/{data_name}_HOG_featuers.npy')\n",
    "#基于数据驱动\n",
    "features_pca = np.load(f'./Features/{data_name}_PCA3x3_features.npy')\n",
    "features_DicL = np.load(f'./Features/{data_name}_DictL_features.npy')\n",
    "\n",
    "# 将数据从 NumPy 转换为 PyTorch 张量，并将其转移到 GPU\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "features_bgr1x1 = torch.tensor(features_bgr1x1, dtype=torch.float32, device=device)\n",
    "features_hsv1x1 = torch.tensor(features_hsv1x1, dtype=torch.float32, device=device)\n",
    "features_bgr3x3 = torch.tensor(features_bgr3x3, dtype=torch.float32, device=device)\n",
    "features_hsv3x3 = torch.tensor(features_hsv3x3, dtype=torch.float32, device=device)\n",
    "features_hog = torch.tensor(features_hog, dtype=torch.float32, device=device)\n",
    "\n",
    "features_pca = torch.tensor(features_pca, dtype=torch.float32, device=device)\n",
    "features_DicL = torch.tensor(features_DicL, dtype=torch.float32, device=device)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "ad44125e-912c-47ad-945b-5bdc5206c92f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def optimal_c_value(features, max_c=15):\n",
    "    sse = []  # 存储SSE（聚类内误差平方和）\n",
    "    \n",
    "    # 计算不同C值下的SSE，Elbow方法选择合适的C值\n",
    "    for c in tqdm(range(2, max_c + 1), desc=\"Calculating optimal C\"):\n",
    "        kmeans = MiniBatchKMeans(n_clusters=c, random_state=42, batch_size=200)\n",
    "        kmeans.fit(features.cpu().numpy())  # 转换为 CPU 上的 NumPy 数组\n",
    "        sse.append(kmeans.inertia_)  # 读取当前聚类的SSE存入列表\n",
    "\n",
    "    # # 绘制 Elbow 图，选择拐点\n",
    "    # plt.plot(range(2, max_c + 1), sse)\n",
    "    # plt.xlabel('Number of clusters (C)')\n",
    "    # plt.ylabel('SSE')\n",
    "    # plt.title('Elbow Method For Optimal C')\n",
    "    # plt.show()\n",
    "\n",
    "    # 找到 Elbow 的最佳 C 值\n",
    "    optimal_c = np.argmin(np.diff(sse)) + 2  # 选择拐点的前一个值\n",
    "    return optimal_c\n",
    "\n",
    "def Kmeans(features, optimal_c):\n",
    "    # 使用 PyTorch 进行 GPU 加速的 KMeans 聚类\n",
    "    features_gpu = features.to(device)\n",
    "    num_samples, num_features = features_gpu.shape\n",
    "    kmeans = KMeans(n_clusters=optimal_c, random_state=42)\n",
    "    labels = kmeans.fit_predict(features_gpu.cpu().numpy())  # 将数据从 GPU 转回 CPU 进行处理\n",
    "\n",
    "    centroids = torch.tensor(kmeans.cluster_centers_, dtype=torch.float32, device=device)\n",
    "\n",
    "    with_d, between_d, cluster_score = cluster_eval(features_gpu, labels, optimal_c, centroids)\n",
    "    print(\"features.shape: \", features.shape)\n",
    "    print('类内间距: ', with_d)\n",
    "    print('类外间距: ', between_d)\n",
    "    print('聚类效果评价参数: ', cluster_score)\n",
    "\n",
    "    return kmeans, labels\n",
    "\n",
    "def cluster_eval(data, labels, optimal_c, centroids):\n",
    "    K = optimal_c\n",
    "\n",
    "    # 计算类内间距\n",
    "    within_distances = []\n",
    "    for i in range(K):\n",
    "        cluster_points = data[labels == i]\n",
    "        centroid = centroids[i]\n",
    "\n",
    "        # 计算每个点到中心的平均距离\n",
    "        if cluster_points.shape[0] > 0:\n",
    "            distance = torch.mean(torch.norm(cluster_points - centroid, dim=1))\n",
    "            within_distances.append(distance.item())\n",
    "\n",
    "    total_within_distance = torch.mean(torch.tensor(within_distances, device=device)).item()\n",
    "\n",
    "    # 计算类外间距\n",
    "    between_distances = []\n",
    "    for i in range(K):\n",
    "        for j in range(i + 1, K):\n",
    "            distance = torch.norm(centroids[i] - centroids[j])\n",
    "            between_distances.append(distance.item())\n",
    "\n",
    "    total_between_distance = torch.mean(torch.tensor(between_distances, device=device)).item()\n",
    "\n",
    "    # 使用最大类间距（可选）\n",
    "    max_between_distance = torch.max(torch.tensor(between_distances)).item()\n",
    "\n",
    "    # 聚类效果参数\n",
    "    cluster_score = total_between_distance / total_within_distance\n",
    "\n",
    "    return total_within_distance, total_between_distance, cluster_score\n",
    "\n",
    "    \n",
    "# def cluster_eval(data, labels, optimal_c, centroids):\n",
    "#     K = optimal_c\n",
    "\n",
    "#     # 计算类内间距\n",
    "#     within_distances = []\n",
    "#     for i in range(K):\n",
    "#         cluster_points = data[labels == i]\n",
    "#         centroid = centroids[i]\n",
    "\n",
    "#         # 计算每个点到中心的距离并求和\n",
    "#         distance = torch.sum(torch.norm(cluster_points - centroid, dim=1))  # 使用 PyTorch 的 norm 函数\n",
    "#         within_distances.append(distance)\n",
    "\n",
    "#     total_within_distance = torch.mean(torch.tensor(within_distances, device=device)).item()\n",
    "\n",
    "#     # 计算类外间距\n",
    "#     between_distances = []\n",
    "#     for i in range(K):\n",
    "#         for j in range(i + 1, K):\n",
    "#             distance = torch.norm(centroids[i] - centroids[j])  # 计算中心点之间的距离\n",
    "#             between_distances.append(distance.item())\n",
    "\n",
    "#     total_between_distance = torch.mean(torch.tensor(between_distances, device=device)).item()\n",
    "\n",
    "#     cluster_score = total_between_distance / total_within_distance\n",
    "\n",
    "#     return total_within_distance, total_between_distance, cluster_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e52dce36-18f9-4412-8349-2d0e7709d69d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BGR1x1特征聚类分析:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Calculating optimal C: 100%|█| 14/14 [00:00<00:00, 52.45it/s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features.shape:  torch.Size([1250, 3])\n",
      "类内间距:  62.884315490722656\n",
      "类外间距:  284.2486267089844\n",
      "聚类效果评价参数:  4.520183204521319\n",
      "optimal C-value with BGR1x1: 2\n",
      "HSV1x1特征聚类分析:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Calculating optimal C: 100%|█| 14/14 [00:00<00:00, 136.63it/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features.shape:  torch.Size([1250, 3])\n",
      "类内间距:  62.884315490722656\n",
      "类外间距:  284.2486267089844\n",
      "聚类效果评价参数:  4.520183204521319\n",
      "optimal C-value with HSV1x1: 2\n",
      "BGR3x3特征聚类分析:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Calculating optimal C: 100%|█| 14/14 [00:00<00:00, 106.72it/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features.shape:  torch.Size([1250, 27])\n",
      "类内间距:  193.79135131835938\n",
      "类外间距:  842.4631958007812\n",
      "聚类效果评价参数:  4.347269318622931\n",
      "optimal C-value with BGR3x3: 2\n",
      "HSV3x3特征聚类分析:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Calculating optimal C: 100%|█| 14/14 [00:00<00:00, 131.45it/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features.shape:  torch.Size([1250, 27])\n",
      "类内间距:  139.42575073242188\n",
      "类外间距:  434.68865966796875\n",
      "聚类效果评价参数:  3.117707147958622\n",
      "optimal C-value with HSV3x3: 3\n",
      "HOG特征聚类分析:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Calculating optimal C: 100%|█| 14/14 [00:00<00:00, 146.09it/"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features.shape:  torch.Size([1830, 36])\n",
      "类内间距:  0.5216720700263977\n",
      "类外间距:  0.4928678572177887\n",
      "聚类效果评价参数:  0.9447848285089301\n",
      "optimal C-value with HOG: 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# 基于领域知识\n",
    "print(\"BGR1x1特征聚类分析:\")\n",
    "optimal_c_bgr1x1 = optimal_c_value(features_bgr1x1)\n",
    "kmeans_bgr1x1, labels_bgr1x1 = Kmeans(features_bgr1x1, optimal_c_bgr1x1)\n",
    "print(f\"optimal C-value with BGR1x1: {optimal_c_bgr1x1}\")\n",
    "\n",
    "print(\"HSV1x1特征聚类分析:\")\n",
    "optimal_c_bgr1x1 = optimal_c_value(features_bgr1x1)\n",
    "kmeans_bgr1x1, labels_bgr1x1 = Kmeans(features_bgr1x1, optimal_c_bgr1x1)\n",
    "print(f\"optimal C-value with HSV1x1: {optimal_c_bgr1x1}\")\n",
    "\n",
    "print(\"BGR3x3特征聚类分析:\")\n",
    "optimal_c_bgr3x3 = optimal_c_value(features_bgr3x3)\n",
    "kmeans_bgr3x3, labels_bgr3x3 = Kmeans(features_bgr3x3, optimal_c_bgr3x3)\n",
    "print(f\"optimal C-value with BGR3x3: {optimal_c_bgr3x3}\")\n",
    "\n",
    "print(\"HSV3x3特征聚类分析:\")\n",
    "optimal_c_hsv3x3 = optimal_c_value(features_hsv3x3)\n",
    "kmeans_hsv3x3, labels_hsv3x3 = Kmeans(features_hsv3x3, optimal_c_hsv3x3)\n",
    "print(f\"optimal C-value with HSV3x3: {optimal_c_hsv3x3}\")\n",
    "\n",
    "print(\"HOG特征聚类分析:\")\n",
    "optimal_c_hog = optimal_c_value(features_hog)\n",
    "kmeans_hog, labels_hog = Kmeans(features_hog, optimal_c_hog)\n",
    "print(f\"optimal C-value with HOG: {optimal_c_hog}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ab0aa9df-b709-4dca-a19a-9d9f9d12fa8f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PCA3x3特征聚类分析:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Calculating optimal C: 100%|█| 14/14 [00:00<00:00, 141.83it/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features.shape:  torch.Size([1250, 26])\n",
      "类内间距:  2.2967076301574707\n",
      "类外间距:  9.920421600341797\n",
      "聚类效果评价参数:  4.3194098674464785\n",
      "optimal C-value with PCA3x3: 2\n",
      "字典特征聚类分析:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Calculating optimal C: 100%|█| 14/14 [00:00<00:00, 159.90it/\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features.shape:  torch.Size([1922, 64])\n",
      "类内间距:  2279.177734375\n",
      "类外间距:  9922.7265625\n",
      "聚类效果评价参数:  4.3536431638671775\n",
      "optimal C-value with DicL: 2\n"
     ]
    }
   ],
   "source": [
    "#基于数据驱动\n",
    "print(\"PCA3x3特征聚类分析:\")\n",
    "optimal_c_pca = optimal_c_value(features_pca)\n",
    "kmeans_pca, labels_pca = Kmeans(features_pca, optimal_c_pca)\n",
    "print(f\"optimal C-value with PCA3x3: {optimal_c_pca}\")\n",
    "\n",
    "print(\"字典特征聚类分析:\")\n",
    "optimal_c_DicL = optimal_c_value(features_DicL)\n",
    "kmeans_DicL, labels_DicL = Kmeans(features_DicL, optimal_c_DicL)\n",
    "print(f\"optimal C-value with DicL: {optimal_c_DicL}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.20"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
